The composted tannery sludge (CTS) dataset from Prof. Ademir Araujo asfaruaj@yahoo.com.br from Federal University of PiauÃ. Sequence data was analyzed by Dr. Lucas William Mendes lucaswmendes@gmail.com from University of Sao Paulo.
rm(list=ls())
library(tidyverse)
library(vegan)
library(ape)
library(RColorBrewer)
library(ggpubr)
library(ggpmisc)
mytheme <- theme_bw()+
theme(panel.spacing = unit(0, "lines"),
strip.background = element_blank(),
strip.placement = "outside",
legend.box.background = element_rect(),
legend.box.margin = margin(1, 1, 1, 1),
legend.position = "right",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
# feature table
com <- read.csv("out_table_rarefied.csv", header = 1, row.names = 1, sep = ",")
com <- t(com)
str(com)
## int [1:74, 1:17872] 480 324 458 704 612 653 955 590 458 706 ...
## - attr(*, "dimnames")=List of 2
## ..$ : chr [1:74] "T1_0a" "T1_0b" "T1_0c" "T1_150a" ...
## ..$ : chr [1:17872] "649b4c705852cc03df6784a773e7b8e7" "b7c7e3f5733da18123882ca1001824cc" "87e24a4c32d23bd13e47081d40f9cd48" "286519bf837f34d632593c879b356650" ...
knitr::kable(
com[1:5, 1:2],
caption = "how the OTU table looks"
)
| 649b4c705852cc03df6784a773e7b8e7 | b7c7e3f5733da18123882ca1001824cc | |
|---|---|---|
| T1_0a | 480 | 133 |
| T1_0b | 324 | 142 |
| T1_0c | 458 | 138 |
| T1_150a | 704 | 181 |
| T1_150b | 612 | 217 |
cat("the OTU/feature table has been rarefied to", rarefactiondepth <- mean(rowSums(com)))
## the OTU/feature table has been rarefied to 14500
The number of samples is: 74; the number of species/ASVs is: 17872; the range of sequence number among samples is: 1, 4.9133^{4}.
metadata <- read.csv("metadata.csv", header = 1, row.names = 1, sep = ",")
dim(metadata)
## [1] 74 2
head(metadata)
## Treat Day
## T1_0a T1 0
## T1_0b T1 0
## T1_0c T1 0
## T1_150a T1 150
## T1_150b T1 150
## T1_150c T1 150
taxonomy <- read.csv("taxonomy_splited_columns.csv", header = 1, row.names = 1, sep = ",")
dim(taxonomy)
## [1] 20939 7
head(taxonomy)
## Domain Phylum Class
## 649b4c705852cc03df6784a773e7b8e7 Bacteria Firmicutes Bacilli
## b7c7e3f5733da18123882ca1001824cc Bacteria Firmicutes Bacilli
## 87e24a4c32d23bd13e47081d40f9cd48 Bacteria Chloroflexi KD4-96
## 286519bf837f34d632593c879b356650 Bacteria Proteobacteria Alphaproteobacteria
## a3805d6b05f8f5f2115b4467222a9186 Bacteria Firmicutes Bacilli
## 27af035245a17ed534476e8517ed27b4 Bacteria Firmicutes Bacilli
## Order Family Genus
## 649b4c705852cc03df6784a773e7b8e7 Bacillales
## b7c7e3f5733da18123882ca1001824cc Bacillales Bacillaceae Bacillus
## 87e24a4c32d23bd13e47081d40f9cd48
## 286519bf837f34d632593c879b356650 Rhizobiales Beijerinckiaceae Microvirga
## a3805d6b05f8f5f2115b4467222a9186 Bacillales Bacillaceae Bacillus
## 27af035245a17ed534476e8517ed27b4 Bacillales Paenibacillaceae Ammoniphilus
## Species
## 649b4c705852cc03df6784a773e7b8e7
## b7c7e3f5733da18123882ca1001824cc
## 87e24a4c32d23bd13e47081d40f9cd48
## 286519bf837f34d632593c879b356650
## a3805d6b05f8f5f2115b4467222a9186
## 27af035245a17ed534476e8517ed27b4 _uncultured bacterium
For more details about ‘sample specific cutoffs’ please check Jia, X., Dini-Andreote, F. & Salles, J.F. Unravelling the interplay of ecological processes structuring the bacterial rare biosphere. ISME COMMUN. 2, 96 (2022). https://doi.org/10.1038/s43705-022-00177-6
# source the TruncateTable FUNCTION
source("TruncateTable.r")
# The truncated datasets can be stored as follows:
dominant <- TruncateTable(com, typem = "dominant")
str(dominant)
## num [1:74, 1:471] 480 324 458 704 612 653 955 590 458 706 ...
## - attr(*, "dimnames")=List of 2
## ..$ : chr [1:74] "T1_0a" "T1_0b" "T1_0c" "T1_150a" ...
## ..$ : chr [1:471] "649b4c705852cc03df6784a773e7b8e7" "b7c7e3f5733da18123882ca1001824cc" "87e24a4c32d23bd13e47081d40f9cd48" "286519bf837f34d632593c879b356650" ...
# write.csv(t(dominant), "OTU_table_dominant.csv")
rare <-TruncateTable(com, typem = "rare")
str(rare)
## num [1:74, 1:17858] 0 0 0 0 0 0 0 0 0 0 ...
## - attr(*, "dimnames")=List of 2
## ..$ : chr [1:74] "T1_0a" "T1_0b" "T1_0c" "T1_150a" ...
## ..$ : chr [1:17858] "b7c7e3f5733da18123882ca1001824cc" "87e24a4c32d23bd13e47081d40f9cd48" "286519bf837f34d632593c879b356650" "a3805d6b05f8f5f2115b4467222a9186" ...
# write.csv(t(rare), "OTU_table_rare.csv")
# Combine rare and dominant biosphere
row.names(dominant) <- paste("dominant", row.names(dominant), sep = "_")
dominant <- as.data.frame(t(dominant))
row.names(rare) <- paste("rare", row.names(rare), sep = "_")
rare <- as.data.frame(t(rare))
# combine rare & dominant biospheres
com_rare_dominant <- transform(merge(dominant, rare, by = "row.names", all = TRUE), row.names = Row.names, Row.names = NULL)
com_rare_dominant[is.na(com_rare_dominant)] <- 0
dim(com_rare_dominant)
## [1] 17872 148
com_rare_dominant[1:2, 1:3]
## dominant_T1_0a dominant_T1_0b dominant_T1_0c
## 000dd624e20d639541481236d8e7569f 0 0 0
## 00107421598fe0eeae292ce70483b4b5 0 0 0
df <- as.data.frame(colSums(com_rare_dominant))
df$ab <- df[, 1]
df <- df %>%
rownames_to_column(var = "col") %>%
separate(col, into = c("Biosphere", "Treat", "Day"), sep = "_") %>%
mutate(Day = gsub('.{1}$', '', Day)) %>%
select(-4)
# calculate relative abundance
df$ab <- df$ab*100/rarefactiondepth
# change Treat info
df1 <- df %>%
mutate(Treat = factor(Treat, levels = c("T1", "T2", "T3", "T4", "T5"), labels = c("0 ton CTS/ha", "2.5 ton CTS/ha", "5 ton CTS/ha", "10 ton CTS/ha", "20 ton CTS/ha"))) %>%
mutate(Biospheres = factor(Biosphere, levels = c("dominant", "rare"), labels = c("Dominant biosphere", "Rare biosphere")))
df1$Day <- as.numeric(as.character(df1$Day))
head(df1)
## Biosphere Treat Day ab Biospheres
## 1 dominant 0 ton CTS/ha 0 27.64828 Dominant biosphere
## 2 dominant 0 ton CTS/ha 0 21.67586 Dominant biosphere
## 3 dominant 0 ton CTS/ha 0 22.82069 Dominant biosphere
## 4 dominant 0 ton CTS/ha 150 30.54483 Dominant biosphere
## 5 dominant 0 ton CTS/ha 150 23.51034 Dominant biosphere
## 6 dominant 0 ton CTS/ha 150 29.15172 Dominant biosphere
# line plot
(p <- ggplot(df1, aes(x = Day, y = ab)) +
geom_smooth(method = lm, se = TRUE, size = .6, aes(color = Treat)) +
geom_point(aes(color = Treat), size = 3, alpha = .8) +
scale_color_brewer(palette = "PuBu", name = "CTS concentration") +
facet_grid(Biospheres ~ Treat, scales = "free_y") +
stat_poly_eq(mapping = use_label(c("R2", "P")), p.digits = 2, label.y = "bottom", label.x = "left") +
scale_x_continuous(breaks = c(0, 45, 75, 150, 180)) +
labs(title = "", x = "Day", y = "Relative abundance (%)") +
mytheme +
theme(text = element_text(size = 15)))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## `geom_smooth()` using formula = 'y ~ x'
# ggsave("Total_relative_abundance_rare_dominant_biospheres.pdf", width = 20, height = 8, units = "cm", p, scale = 1.7)
# ggsave("Total_relative_abundance_rare_dominant_biospheres.jpg", width = 20, height = 8, units = "cm", p, scale = 1.7)
# change Treat info
df2 <- df %>%
mutate(Treat = factor(Treat, levels = c("T1", "T2", "T3", "T4", "T5"), labels = c("0", "2.5", "5", "10", "20"))) %>%
mutate(Day = factor(Day, levels = c("0", "45", "75", "150", "180"))) %>%
mutate(Biospheres = factor(Biosphere, levels = c("dominant", "rare"), labels = c("Dominant biosphere", "Rare biosphere")))
df2$Treat <- as.numeric(as.character(df2$Treat))
head(df2)
## Biosphere Treat Day ab Biospheres
## 1 dominant 0 0 27.64828 Dominant biosphere
## 2 dominant 0 0 21.67586 Dominant biosphere
## 3 dominant 0 0 22.82069 Dominant biosphere
## 4 dominant 0 150 30.54483 Dominant biosphere
## 5 dominant 0 150 23.51034 Dominant biosphere
## 6 dominant 0 150 29.15172 Dominant biosphere
# line plot
(p <- ggplot(df2, aes(x = Treat, y = ab)) +
geom_smooth(method = lm, se = TRUE, size = .6, aes(color = factor(Treat))) +
geom_point(aes(color = factor(Treat)), size = 3, alpha = .8) +
scale_color_brewer(palette = "PuBu", name = "CTS concentration") +
stat_poly_eq(mapping = use_label(c("R2", "P")), p.digits = 2, label.y = "bottom", label.x = "left") +
scale_x_continuous(breaks = c(0, 2.5, 5, 10, 20)) +
facet_grid(Biospheres ~ Day, scales = "free_y") +
labs(title = "", x = "Treatment", y = "Relative abundance (%)") +
mytheme +
theme(text = element_text(size = 15)))
## `geom_smooth()` using formula = 'y ~ x'
# ggsave("Total_relative_abundance_rare_dominant_biospheres_treat.pdf", width = 20, height = 8, units = "cm", p, scale = 1.7)
# ggsave("Total_relative_abundance_rare_dominant_biospheres_treat.jpg", width = 20, height = 8, units = "cm", p, scale = 1.7)
richness <- specnumber(t(com_rare_dominant))
df <- data.frame(richness)
# prepare a meta data file
group_info <- data.frame(row.names = rownames(df), t(as.data.frame(strsplit(as.character(row.names(df)), "_"))))
# remove last letter in the third column
group_info$X3 <- gsub('.{1}$', '', group_info$X3)
# combine metadata to the dominant phyla table
df <- data.frame(Biosphere = as.factor(group_info[,1]),
Treat = as.factor(group_info[,2]),
Day = as.factor(group_info[,3]),
df)
# get average per group
df1 <- df %>%
mutate(Treat = factor(Treat, levels = c("T1", "T2", "T3", "T4", "T5"), labels = c("0 ton CTS/ha", "2.5 ton CTS/ha", "5 ton CTS/ha", "10 ton CTS/ha", "20 ton CTS/ha"))) %>%
mutate(Day = factor(Day, levels = c("0", "45", "75", "150", "180"))) %>%
mutate(Biospheres = factor(Biosphere, levels = c("dominant", "rare"), labels = c("Dominant biosphere", "Rare biosphere"))) %>%
group_by(Biospheres, Treat, Day) %>%
summarise(avg = mean(richness, na.rm=T)) %>%
arrange(Day, rev(Biospheres)) %>%
group_by(Day, Treat) %>%
mutate(label_y = cumsum(avg) - 0.5 * avg)
## `summarise()` has grouped output by 'Biospheres', 'Treat'. You can override
## using the `.groups` argument.
df1$avg <- as.integer(df1$avg)
# stacked-bar plot
(p <- ggplot(df1, aes(x = Day, y = avg, fill = Biospheres)) +
geom_col(color = "black", width = 0.8, lwd = 0.1) +
scale_y_continuous(expand = c(0, 8)) +
facet_grid(. ~ Treat) +
labs(x = "Days", y = "Richness (NO. of OTUs)", title = "") +
geom_text(aes(y = label_y, label = avg), colour = "white", size = 2) +
scale_fill_brewer(palette = "Pastel1") +
mytheme)
# ggsave("Richness_stacked_rare_dominant.pdf", width = 14, height = 8.5, units = "cm", p, scale = 1.5)
# ggsave("Figure_1.pdf", width = 14, height = 8.5, units = "cm", p, scale = 1.5)
# ggsave("Figure_1.jpg", width = 14, height = 8.5, units = "cm", p, scale = 1.5)
# line plot
df2 <- df %>%
mutate(Treat = factor(Treat, levels = c("T1", "T2", "T3", "T4", "T5"), labels = c("0 ton CTS/ha", "2.5 ton CTS/ha", "5 ton CTS/ha", "10 ton CTS/ha", "20 ton CTS/ha"))) %>%
mutate(Day = factor(Day, levels = c("0", "45", "75", "150", "180"))) %>%
mutate(Biospheres = factor(Biosphere, levels = c("dominant", "rare"), labels = c("Dominant biosphere", "Rare biosphere")))
pd <- position_dodge(0.2)
(p <- ggplot(df2, aes(x = Day, y = richness, group = Treat, colour = Treat)) +
stat_summary(fun.data = "mean_se", geom = "errorbar", width = 0.2, alpha = 0.5, position = pd) +
stat_summary(fun = "mean", geom = "line", alpha = 0.5, position = pd) +
stat_summary(fun = "mean", geom = "point", size = 3, position = pd) +
scale_color_brewer(palette="PuBu", name = "CTS concentration") +
facet_wrap(.~Biospheres, scales = "free_y") +
labs(title = "", x = "Day", y = "Richness (NO. of OTUs)") +
mytheme)
# ggsave("Richness_lineplot_freey_rare_dominant.pdf", width = 14, height = 8.5, units = "cm", p, scale = 1.5)
(p <- ggplot(df2, aes(x = Day, y = richness, group = Treat, colour = Treat)) +
stat_summary(fun.data = "mean_se", geom = "errorbar", width = 0.2, alpha = 0.5, position = pd) +
stat_summary(fun = "mean", geom = "line", alpha = 0.5, position = pd) +
stat_summary(fun = "mean", geom = "point", size = 3, position = pd) +
scale_color_brewer(palette="PuBu", name = "CTS concentration") +
facet_grid(.~Biospheres) +
labs(title = "", x = "Day", y = "Richness (NO. of OTUs)") +
mytheme)
# ggsave("Richness_lineplot_rare_dominant.pdf", width = 14, height = 8.5, units = "cm", p, scale = 1.5)
# ggsave("Figure_1_optional.pdf", width = 14, height = 8.5, units = "cm", p, scale = 1.5)
# ggsave("Figure_1_optional.jpg", width = 14, height = 8.5, units = "cm", p, scale = 1.5)
# dominant biosphere
dist <- vegdist(t(dominant), method = "bray", binary = FALSE, diag = 1)
re <- pcoa(dist, correction = "none", rn = NULL)
df <- re$vectors[, 1:2]
# prepare a meta data file
group_info <- data.frame(row.names = rownames(df), t(as.data.frame(strsplit(as.character(row.names(df)), "_"))))
# remove last letter in the third column
group_info$X3 <- gsub('.{1}$', '', group_info$X3)
# combine metadata to the dominant phyla table
df <- data.frame(Biosphere = as.factor(group_info[,1]),
Treat = as.factor(group_info[,2]),
Day = as.factor(group_info[,3]),
df)
df <- df %>%
mutate(Treat = factor(Treat, levels = c("T1", "T2", "T3", "T4", "T5"), labels = c("0 ton CTS/ha", "2.5 ton CTS/ha", "5 ton CTS/ha", "10 ton CTS/ha", "20 ton CTS/ha"))) %>%
mutate(Day = factor(Day, levels = c("0", "45", "75", "150", "180")))
# two way permanova
set.seed(123)
result <- adonis(dist ~ Treat*Day, data = df, permutation = 9999)
## 'adonis' will be deprecated: use 'adonis2' instead
result$aov.tab
## Permutation: free
## Number of permutations: 9999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Treat 4 1.4377 0.35943 2.8552 0.11571 0.0001 ***
## Day 4 2.7843 0.69608 5.5294 0.22409 0.0001 ***
## Treat:Day 16 2.0344 0.12715 1.0101 0.16374 0.4378
## Residuals 49 6.1684 0.12589 0.49646
## Total 73 12.4249 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# write.csv(result$aov.tab, "PERMANOVA_dominant.csv")
(p1 <- ggplot(df, aes(Axis.1, Axis.2, shape = Treat, color = Day)) +
geom_point(size = 3, alpha = 0.7) +
labs(x = paste("PCoA1 (", round(re$values$Relative_eig[1] * 100, 2), "%)", sep = ""),
y = paste("PCoA2 (", round(re$values$Relative_eig[2] * 100, 2), "%)", sep = ""),
title = "Dominant biosphere") +
scale_color_brewer(palette="Dark2") +
scale_shape_manual(values = c(1, 0, 15, 16, 17), name = "CTS concentration") +
theme_bw())
# rare biosphere
dist <- vegdist(t(rare), method = "bray", binary = FALSE, diag = 1)
re <- pcoa(dist, correction = "none", rn = NULL)
df <- re$vectors[, 1:2]
# prepare a meta data file
group_info <- data.frame(row.names = rownames(df), t(as.data.frame(strsplit(as.character(row.names(df)), "_"))))
# remove last letter in the third column
group_info$X3 <- gsub('.{1}$', '', group_info$X3)
# combine metadata to the dominant phyla table
df <- data.frame(Biosphere = as.factor(group_info[,1]),
Treat = as.factor(group_info[,2]),
Day = as.factor(group_info[,3]),
df)
df <- df %>%
mutate(Treat = factor(Treat, levels = c("T1", "T2", "T3", "T4", "T5"), labels = c("0 ton CTS/ha", "2.5 ton CTS/ha", "5 ton CTS/ha", "10 ton CTS/ha", "20 ton CTS/ha"))) %>%
mutate(Day = factor(Day, levels = c("0", "45", "75", "150", "180")))
# two way permanova
set.seed(123)
result <- adonis(dist ~ Treat*Day, data = df, permutation = 9999)
## 'adonis' will be deprecated: use 'adonis2' instead
result$aov.tab
## Permutation: free
## Number of permutations: 9999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Treat 4 3.0062 0.75155 3.3437 0.15043 0.0001 ***
## Day 4 2.3690 0.59224 2.6349 0.11854 0.0001 ***
## Treat:Day 16 3.5957 0.22473 0.9998 0.17992 0.4774
## Residuals 49 11.0135 0.22477 0.55111
## Total 73 19.9843 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# write.csv(result$aov.tab, "PERMANOVA_rare.csv")
(p2 <- ggplot(df, aes(Axis.1, Axis.2, shape = Treat, color = Day)) +
geom_point(size = 3, alpha = 0.7) +
labs(x = paste("PCoA1 (", round(re$values$Relative_eig[1] * 100, 2), "%)", sep = ""),
y = paste("PCoA2 (", round(re$values$Relative_eig[2] * 100, 2), "%)", sep = ""),
title = "Rare biosphere") +
scale_color_brewer(palette="Dark2") +
scale_shape_manual(values = c(1, 0, 15, 16, 17), name = "CTS concentration") +
theme_bw())
p <- ggarrange(p1, p2, labels = c("A", "B"), common.legend = TRUE, legend = "right", ncol = 2)
# ggsave("PCoA_rare_dominant.pdf", width = 16, height = 7.5, units = "cm", p, scale = 1.5)
# ggsave("Figure_2.pdf", width = 16, height = 7.5, units = "cm", p, scale = 1.5)
# ggsave("Figure_2.jpg", width = 16, height = 7.5, units = "cm", p, scale = 1.5)
# species composition of the rare and dominant biosphere at Phyla level (relative abundance)
# add taxonomy info
df <- transform(merge(taxonomy[, -c(1, 4:7)], com_rare_dominant, by = "row.names"), row.names = Row.names, Row.names = NULL)
df <- df %>% select(-Class)
warning("double check is the table rarified or not?")
## Warning: double check is the table rarified or not?
(rarefactiondepth <- mean(rowSums(com)))
## [1] 14500
# combine OTUs in the same phylum
df <- aggregate(df[,-1], list(df$Phylum), sum)
row.names(df) <- df$Group.1
df <- df[, -1]
df <- 100*df/rarefactiondepth
df[1:2, 1:4]
## dominant_T1_0a dominant_T1_0b dominant_T1_0c dominant_T1_150a
## Acidobacteria 8.068966 6.524138 5.786207 1.710345
## Actinobacteria 1.165517 4.634483 1.979310 11.655172
row.names(df)
## [1] "Acidobacteria" "Actinobacteria" "Armatimonadetes"
## [4] "Bacteroidetes" "BRC1" "Chlamydiae"
## [7] "Chloroflexi" "Cyanobacteria" "Dadabacteria"
## [10] "Deinococcus-Thermus" "Dependentiae" "Elusimicrobia"
## [13] "Entotheonellaeota" "Epsilonbacteraeota" "FBP"
## [16] "FCPU426" "Fibrobacteres" "Firmicutes"
## [19] "Fusobacteria" "GAL15" "Gemmatimonadetes"
## [22] "Halanaerobiaeota" "Hydrogenedentes" "Kiritimatiellaeota"
## [25] "Latescibacteria" "Nitrospirae" "Omnitrophicaeota"
## [28] "Patescibacteria" "Planctomycetes" "Proteobacteria"
## [31] "Rokubacteria" "Spirochaetes" "Tenericutes"
## [34] "Verrucomicrobia" "WPS-2" "WS2"
all phyla
# transpose the dataframe
df1 <- t(df)
df1 <- as.data.frame(df1)
# prepare a meta data file
group_info <- data.frame(row.names = rownames(df1), t(as.data.frame(strsplit(as.character(row.names(df1)), "_"))))
# remove last letter in the third column
group_info$X3 <- gsub('.{1}$', '', group_info$X3)
# combine metadata to the dominant phyla table
df1.1 <- data.frame(Biosphere = as.factor(group_info[,1]),
Treat = as.factor(group_info[,2]),
Day = as.factor(group_info[,3]),
df1)
# pivot data frame from wide to long
df1.2 <- df1.1 %>%
pivot_longer(cols = 4:ncol(df1.1),
names_to = "Phyla",
values_to = "value") %>%
mutate(Treat = factor(Treat, levels = c("T1", "T2", "T3", "T4", "T5"), labels = c("0 ton CTS/ha", "2.5 ton CTS/ha", "5 ton CTS/ha", "10 ton CTS/ha", "20 ton CTS/ha"))) %>%
mutate(Day = factor(Day, levels = c("0", "45", "75", "150", "180"))) %>%
mutate(Biosphere = factor(Biosphere, levels = c("dominant", "rare"), labels = c("Dominant biosphere", "Rare biosphere")))
head(df1.2)
## # A tibble: 6 × 5
## Biosphere Treat Day Phyla value
## <fct> <fct> <fct> <chr> <dbl>
## 1 Dominant biosphere 0 ton CTS/ha 0 Acidobacteria 8.07
## 2 Dominant biosphere 0 ton CTS/ha 0 Actinobacteria 1.17
## 3 Dominant biosphere 0 ton CTS/ha 0 Armatimonadetes 0
## 4 Dominant biosphere 0 ton CTS/ha 0 Bacteroidetes 0.621
## 5 Dominant biosphere 0 ton CTS/ha 0 BRC1 0
## 6 Dominant biosphere 0 ton CTS/ha 0 Chlamydiae 0
# write_csv(df1.2, "phyla_composition_all_rare_dominnat.csv")
# pivot data frame from wide to long
df1.1 <- df1.1 %>%
pivot_longer(cols = 4:ncol(df1.1),
names_to = "Phyla",
values_to = "value") %>%
mutate(Treat = factor(Treat, levels = c("T1", "T2", "T3", "T4", "T5"), labels = c("0 ton CTS/ha", "2.5 ton CTS/ha", "5 ton CTS/ha", "10 ton CTS/ha", "20 ton CTS/ha"))) %>%
mutate(Day = factor(Day, levels = c("0", "45", "75", "150", "180"))) %>%
mutate(Biosphere = factor(Biosphere, levels = c("dominant", "rare"), labels = c("Dominant biosphere", "Rare biosphere"))) %>%
group_by(Biosphere, Treat, Day, Phyla) %>%
summarise(avg = mean(value, na.rm=T))
## `summarise()` has grouped output by 'Biosphere', 'Treat', 'Day'. You can
## override using the `.groups` argument.
head(df1.1)
## # A tibble: 6 × 5
## # Groups: Biosphere, Treat, Day [1]
## Biosphere Treat Day Phyla avg
## <fct> <fct> <fct> <chr> <dbl>
## 1 Dominant biosphere 0 ton CTS/ha 0 Acidobacteria 6.79
## 2 Dominant biosphere 0 ton CTS/ha 0 Actinobacteria 2.59
## 3 Dominant biosphere 0 ton CTS/ha 0 Armatimonadetes 0
## 4 Dominant biosphere 0 ton CTS/ha 0 Bacteroidetes 0.329
## 5 Dominant biosphere 0 ton CTS/ha 0 BRC1 0
## 6 Dominant biosphere 0 ton CTS/ha 0 Chlamydiae 0
set.seed(112358)
(colourCount = length(colnames(df1)))
## [1] 36
qual_col_pals <- brewer.pal.info[brewer.pal.info$category == 'qual',]
col_vector <- unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals)))
col <- sample(col_vector, colourCount)
# pie(rep(1,colourCount), col=sample(col_vector, colourCount))
# stacked-bar plot
(p <- ggplot(df1.1, aes(x = Day, y = avg, fill = Phyla)) +
geom_bar(stat = "identity", width = 0.8) +
scale_y_continuous(expand = c(0, 0)) +
scale_fill_manual(values = col) +
facet_grid(Biosphere ~ Treat) +
labs(x = "Days", y = "Relative abundance (%)", title = "") +
guides(fill = guide_legend(title="Phyla", reverse = TRUE)) +
mytheme)
ggsave("phyla_stacked_barplots_all_rare_dominant.pdf", width = 14, height = 8.5, units = "cm", p, scale = 1.5)
# ggsave("Figure_3.pdf", width = 14, height = 8.5, units = "cm", p, scale = 1.5)
# ggsave("Figure_3.jpg", width = 14, height = 8.5, units = "cm", p, scale = 1.5)
only keep phyla with average relative abundance > 0.1%
# only keep phyla with average relative abundance > 0.1%
df2 <- df
df2$mean <- rowMeans(df2)
df2 <- subset(df2, mean > 0.1)
df2 <- df2[with(df2, order(mean)), ]
df2$mean <- NULL
# phyla that abundance more than 0.1%
row.names(df2)
## [1] "Dependentiae" "Armatimonadetes" "Cyanobacteria" "Nitrospirae"
## [5] "Patescibacteria" "Rokubacteria" "Bacteroidetes" "Verrucomicrobia"
## [9] "Gemmatimonadetes" "Planctomycetes" "Chloroflexi" "Acidobacteria"
## [13] "Firmicutes" "Proteobacteria" "Actinobacteria"
# transpose the dataframe
df2 <- t(df2)
df2 <- as.data.frame(df2)
# prepare a meta data file
group_info <- data.frame(row.names = rownames(df2), t(as.data.frame(strsplit(as.character(row.names(df2)), "_"))))
# remove last letter in the third column
group_info$X3 <- gsub('.{1}$', '', group_info$X3)
# combine metadata to the dominant phyla table
df3 <- data.frame(Biosphere = as.factor(group_info[,1]),
Treat = as.factor(group_info[,2]),
Day = as.factor(group_info[,3]),
df2)
# pivot data frame from wide to long
df3 <- df3 %>%
pivot_longer(cols = 4:ncol(df3),
names_to = "Phyla",
values_to = "value") %>%
mutate(Treat = factor(Treat, levels = c("T1", "T2", "T3", "T4", "T5"), labels = c("0 ton CTS/ha", "2.5 ton CTS/ha", "5 ton CTS/ha", "10 ton CTS/ha", "20 ton CTS/ha"))) %>%
mutate(Day = factor(Day, levels = c("0", "45", "75", "150", "180"))) %>%
mutate(Biosphere = factor(Biosphere, levels = c("dominant", "rare"), labels = c("Dominant biosphere", "Rare biosphere"))) %>%
group_by(Biosphere, Treat, Day, Phyla) %>%
summarise(avg = mean(value, na.rm=T)) # summarise_at(vars("PM25", "Ozone", "CO2"), mean); or summarise(across(PM25:CO2, mean))
## `summarise()` has grouped output by 'Biosphere', 'Treat', 'Day'. You can
## override using the `.groups` argument.
head(df3)
## # A tibble: 6 × 5
## # Groups: Biosphere, Treat, Day [1]
## Biosphere Treat Day Phyla avg
## <fct> <fct> <fct> <chr> <dbl>
## 1 Dominant biosphere 0 ton CTS/ha 0 Acidobacteria 6.79
## 2 Dominant biosphere 0 ton CTS/ha 0 Actinobacteria 2.59
## 3 Dominant biosphere 0 ton CTS/ha 0 Armatimonadetes 0
## 4 Dominant biosphere 0 ton CTS/ha 0 Bacteroidetes 0.329
## 5 Dominant biosphere 0 ton CTS/ha 0 Chloroflexi 1.20
## 6 Dominant biosphere 0 ton CTS/ha 0 Cyanobacteria 0
set.seed(112358)
(colourCount = length(colnames(df2)))
## [1] 15
qual_col_pals <- brewer.pal.info[brewer.pal.info$category == 'qual',]
col_vector <- unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals)))
col <- sample(col_vector, colourCount)
# pie(rep(1,colourCount), col=sample(col_vector, colourCount))
# stacked-bar plot
(p <- ggplot(df3, aes(x = Day, y = avg, fill = Phyla)) +
geom_bar(stat = "identity", width = 0.8) +
scale_y_continuous(expand = c(0, 0)) +
scale_fill_manual(values = col) +
facet_grid(Biosphere ~ Treat) +
labs(x = "Days", y = "Relative abundance (%)", title = "") +
guides(fill = guide_legend(title="Phyla", reverse = TRUE)) +
mytheme)
# ggsave("phyla_stacked_barplots_more_than_0.1_rare_dominant.pdf", width = 14, height = 8.5, units = "cm", p, scale = 1.5)
# ggsave("Figure_3.pdf", width = 14, height = 8.5, units = "cm", p, scale = 1.5)
# ggsave("Figure_3.jpg", width = 14, height = 8.5, units = "cm", p, scale = 1.5)
first find the most types (except transiently rare)
# find all taxa belongs to the rare biosphere
taxa_rare <- row.names(rare)
taxa_dominant <- row.names(dominant)
# create a data.frame that
df_types <- data.frame(OTU_ID = colnames(com),
Types = NA)
conditionally_rare_dominant <- taxa_dominant[taxa_dominant %in% taxa_rare]
permanently_dominant <- taxa_dominant[!taxa_dominant %in% taxa_rare]
only_rare <- taxa_rare[!taxa_rare %in% taxa_dominant]
cat("the number of OTUs in the dominant biosphere is", length(taxa_dominant),
"\nthe number of OTUs in the rare biosphere is", length(taxa_rare),
"\nthe number of conditionally rare/dominant OTUs is", length(conditionally_rare_dominant),
"\nthe number of OTUs that only apears in the dominant biosphere is", length(permanently_dominant),
"\nthe number of rare OTUs that not appear in the dominant biosphere is", length(only_rare))
## the number of OTUs in the dominant biosphere is 471
## the number of OTUs in the rare biosphere is 17858
## the number of conditionally rare/dominant OTUs is 457
## the number of OTUs that only apears in the dominant biosphere is 14
## the number of rare OTUs that not appear in the dominant biosphere is 17401
df_types$Types[df_types$OTU_ID %in% conditionally_rare_dominant] <- "conditionally_rare_dominant"
df_types$Types[df_types$OTU_ID %in% only_rare] <- "only_rare"
df_types$Types[df_types$OTU_ID %in% permanently_dominant] <- "permanently_dominant"
df_types <- df_types %>%
# mutate(Types = factor(Types)) %>%
mutate(OTU_ID = as.character(OTU_ID))
head(df_types)
## OTU_ID Types
## 1 649b4c705852cc03df6784a773e7b8e7 permanently_dominant
## 2 b7c7e3f5733da18123882ca1001824cc conditionally_rare_dominant
## 3 87e24a4c32d23bd13e47081d40f9cd48 conditionally_rare_dominant
## 4 286519bf837f34d632593c879b356650 conditionally_rare_dominant
## 5 a3805d6b05f8f5f2115b4467222a9186 conditionally_rare_dominant
## 6 27af035245a17ed534476e8517ed27b4 conditionally_rare_dominant
table(df_types$Types)
##
## conditionally_rare_dominant only_rare
## 457 17401
## permanently_dominant
## 14
# write.csv(df_types, "OTU_ID_and_types.csv")
different types of rarity and dominance in a facet plot
com_classified <- com_rare_dominant %>%
rownames_to_column(var = "OTU_ID")
com_classified <- left_join(df_types, com_classified, by = "OTU_ID") %>%
column_to_rownames(var = "OTU_ID")
# last step - define the "transiently rare"
com_classified$Types[which(rowSums(com_classified[, -1] != 0) == 1 & com_classified$Types != "conditionally_rare_dominant "& com_classified$Types != "permanently_dominant") ] <- "transiently_rare"
com_classified[1:2, 1:3]
## Types dominant_T1_0a
## 649b4c705852cc03df6784a773e7b8e7 permanently_dominant 480
## b7c7e3f5733da18123882ca1001824cc conditionally_rare_dominant 133
## dominant_T1_0b
## 649b4c705852cc03df6784a773e7b8e7 324
## b7c7e3f5733da18123882ca1001824cc 142
df <- com_classified %>%
group_by(Types) %>%
summarise(across(everything(), list(sum))) %>%
column_to_rownames(var = "Types")
df <- as.data.frame(t(df))
# calculate relative abundance
df <- 100*df/rarefactiondepth
# prepare a meta data file
group_info <- data.frame(row.names = rownames(df), t(as.data.frame(strsplit(as.character(row.names(df)), "_"))))
# remove last letter in the third column
group_info$X3 <- gsub('.{1}$', '', group_info$X3)
# combine metadata to the dominant phyla table
df1 <- data.frame(Biosphere = as.factor(group_info[,1]),
Treat = as.factor(group_info[,2]),
Day = as.factor(group_info[,3]),
df)
# pivot data frame from wide to long
df2 <- df1 %>%
pivot_longer(cols = 4:ncol(df1),
names_to = "Types",
values_to = "value")
df2$Types[df2$Types == "conditionally_rare_dominant" & df2$Biosphere == "dominant"] <- "conditionally_dominant"
df2$Types[df2$Types == "conditionally_rare_dominant" & df2$Biosphere == "rare"] <- "conditionally_rare"
df2 <- df2 %>%
select(-Biosphere) %>%
mutate(Treat = factor(Treat, levels = c("T1", "T2", "T3", "T4", "T5"), labels = c("0 ton CTS/ha", "2.5 ton CTS/ha", "5 ton CTS/ha", "10 ton CTS/ha", "20 ton CTS/ha"))) %>%
mutate(Types = factor(Types, levels = c("conditionally_dominant", "permanently_dominant", "only_rare", "conditionally_rare", "transiently_rare"), labels = c("Conditionally dominant", "Permanently dominant", "Permanently rare", "Conditionally rare", "Transiently rare"))) %>%
filter(value > 0)
df2$Day <- as.numeric(as.character(df2$Day))
head(df2)
## # A tibble: 6 × 4
## Treat Day Types value
## <fct> <dbl> <fct> <dbl>
## 1 0 ton CTS/ha 0 Conditionally dominant 21.2
## 2 0 ton CTS/ha 0 Permanently dominant 6.44
## 3 0 ton CTS/ha 0 Conditionally dominant 18.0
## 4 0 ton CTS/ha 0 Permanently dominant 3.68
## 5 0 ton CTS/ha 0 Conditionally dominant 19.7
## 6 0 ton CTS/ha 0 Permanently dominant 3.16
str(df2)
## tibble [370 × 4] (S3: tbl_df/tbl/data.frame)
## $ Treat: Factor w/ 5 levels "0 ton CTS/ha",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Day : num [1:370] 0 0 0 0 0 0 150 150 150 150 ...
## $ Types: Factor w/ 5 levels "Conditionally dominant",..: 1 2 1 2 1 2 1 2 1 2 ...
## $ value: num [1:370] 21.21 6.44 17.99 3.68 19.66 ...
# line plot
(p <- ggplot(df2, aes(x = Day, y = value)) +
geom_smooth(method = lm, se = TRUE, size = .6, aes(color = Treat)) +
geom_point(aes(color = Treat), size = 3, alpha = .8) +
scale_color_brewer(palette = "PuBu", name = "CTS concentration") +
facet_grid(Types~Treat, scales="free_y") +
stat_poly_eq(mapping = use_label(c("R2", "P")), p.digits = 2, label.y = "bottom", label.x = "left") +
scale_x_continuous(breaks=c(0, 45, 75, 150, 180)) +
labs(title = "", x = "Day", y = "Relative abundance (%)") +
mytheme +
theme(text = element_text(size = 15)))
## `geom_smooth()` using formula = 'y ~ x'
## Warning in ci_f_ncp(stat, df1 = df1, df2 = df2, probs = probs): Upper limit
## outside search range. Set to the maximum of the parameter range.
## Warning: Computation failed in `stat_poly_eq()`
## Caused by error in `check_output()`:
## ! out[1] <= out[2] is not TRUE
# ggsave("Figure_5_new.pdf", width = 20, height = 15, units = "cm", p, scale = 1.7)
# ggsave("Figure_5_new.jpg", width = 20, height = 15, units = "cm", p, scale = 1.7)
dominant biosphere
# add rarify classification to the dominant OTU table
dominant_classified <- dominant %>%
rownames_to_column(var = "OTU_ID")
dominant_classified <- left_join(dominant_classified, df_types, by = "OTU_ID") %>%
column_to_rownames(var = "OTU_ID")
# define the "transiently dominant"
# dominant_classified$Types[which(rowSums(dominant_classified[,-ncol(dominant_classified)] != 0) == 1)] = "transiently_dominant"
# conditionally_rare_dominant
# 236
# permanently_dominant
# 1
# transiently_dominant
# 234
# write.csv(dominant_classified, "OTU_table_dominant_with_types.csv")
# summarize sequence number per type
df <- aggregate(dominant_classified[,-ncol(dominant_classified)], list(dominant_classified$Types), sum)
row.names(df) <- df$Group.1
df <- df[, -1]
df <- as.data.frame(t(df))
# calculate relative abundance
df <- 100*df/rarefactiondepth
# prepare a meta data file
group_info <- data.frame(row.names = rownames(df), t(as.data.frame(strsplit(as.character(row.names(df)), "_"))))
# remove last letter in the third column
group_info$X3 <- gsub('.{1}$', '', group_info$X3)
# combine metadata to the dominant phyla table
df1 <- data.frame(Biosphere = as.factor(group_info[,1]),
Treat = as.factor(group_info[,2]),
Day = as.factor(group_info[,3]),
df)
# pivot data frame from wide to long
df2 <- df1 %>%
pivot_longer(cols = 4:ncol(df1),
names_to = "Types",
values_to = "value") %>%
mutate(Treat = factor(Treat, levels = c("T1", "T2", "T3", "T4", "T5"), labels = c("0 ton CTS/ha", "2.5 ton CTS/ha", "5 ton CTS/ha", "10 ton CTS/ha", "20 ton CTS/ha"))) %>%
mutate(Day = factor(Day, levels = c("0", "45", "75", "150", "180"))) %>%
mutate(Types = factor(Types, levels = c("conditionally_rare_dominant", "permanently_dominant"), labels = c("Conditionally dominant", "Permanently dominant"))) %>%
group_by(Biosphere, Treat, Day, Types) %>%
summarise(avg = mean(value, na.rm=T))
## `summarise()` has grouped output by 'Biosphere', 'Treat', 'Day'. You can
## override using the `.groups` argument.
df2$avg <- as.integer(df2$avg)
my_palette = c(brewer.pal(8, "Pastel2")[c(5:6)])
# stacked-bar plot
(p1 <- ggplot(df2, aes(x = Day, y = avg, fill = Types)) +
geom_col(color = "black", width = 0.8, lwd = 0.1) +
scale_y_continuous(expand = c(0, 0), limits=c(0, 80)) +
scale_fill_manual(values = my_palette) +
facet_grid(. ~ Treat) +
labs(x = "Days", y = "Relative abundance (%)", title = "Dominant biosphere") +
guides(fill = guide_legend(title="Types of dominance")) +
mytheme+
theme(legend.position = "bottom"))
# line plot
# pivot data frame from wide to long
df3 <- df1 %>%
pivot_longer(cols = 4:ncol(df1),
names_to = "Types",
values_to = "value") %>%
mutate(Treat = factor(Treat, levels = c("T1", "T2", "T3", "T4", "T5"), labels = c("0 ton CTS/ha", "2.5 ton CTS/ha", "5 ton CTS/ha", "10 ton CTS/ha", "20 ton CTS/ha"))) %>%
mutate(Day = factor(Day, levels = c("0", "45", "75", "150", "180"))) %>%
mutate(Types = factor(Types, levels = c("conditionally_rare_dominant", "permanently_dominant"), labels = c("Conditionally dominant", "Permanently dominant")))
(f1 <- ggplot(df3, aes(x = Day, y = value, group = Treat, colour = Treat)) +
stat_summary(fun.data = "mean_se", geom = "errorbar", width = 0.2, alpha = 0.5, position = pd) +
stat_summary(fun = "mean", geom = "line", alpha = 0.4, position = pd) +
stat_summary(fun = "mean", geom = "point", size = 2, alpha = 0.9, position = pd) +
scale_color_brewer(palette="PuBu", name = "CTS concentration") +
facet_grid(.~Types) +
labs(title = "Dominant biosphere", x = "Day", y = "Relative abundance (%)") +
mytheme)
rare biosphere
# add rarify classification to the rare OTU table
rare_classified <- rare %>%
rownames_to_column(var = "OTU_ID")
rare_classified <- left_join(rare_classified, df_types, by = "OTU_ID") %>%
column_to_rownames(var = "OTU_ID")
# define the "transiently rare"
rare_classified$Types[which(rowSums(rare_classified[,-ncol(rare_classified)] != 0) == 1)] = "transiently_rare"
# write.csv(rare_classified, "OTU_table_rare_with_types.csv")
# summarize sequence number per type
df <- aggregate(rare_classified[,-ncol(rare_classified)], list(rare_classified$Types), sum)
row.names(df) <- df$Group.1
df <- df[, -1]
df <- as.data.frame(t(df))
# calculate relative abundance
df <- 100*df/rarefactiondepth
# prepare a meta data file
group_info <- data.frame(row.names = rownames(df), t(as.data.frame(strsplit(as.character(row.names(df)), "_"))))
# remove last letter in the third column
group_info$X3 <- gsub('.{1}$', '', group_info$X3)
# combine metadata to the dominant phyla table
df1 <- data.frame(Biosphere = as.factor(group_info[,1]),
Treat = as.factor(group_info[,2]),
Day = as.factor(group_info[,3]),
df)
# pivot data frame from wide to long
df2 <- df1 %>%
pivot_longer(cols = 4:ncol(df1),
names_to = "Types",
values_to = "value") %>%
mutate(Treat = factor(Treat, levels = c("T1", "T2", "T3", "T4", "T5"), labels = c("0 ton CTS/ha", "2.5 ton CTS/ha", "5 ton CTS/ha", "10 ton CTS/ha", "20 ton CTS/ha"))) %>%
mutate(Day = factor(Day, levels = c("0", "45", "75", "150", "180"))) %>%
mutate(Types = factor(Types, levels = c("only_rare", "transiently_rare", "conditionally_rare_dominant"), labels = c("Permanently rare", "Transiently rare", "Conditionally rare"))) %>%
group_by(Biosphere, Treat, Day, Types) %>%
summarise(avg = mean(value, na.rm=T))
## `summarise()` has grouped output by 'Biosphere', 'Treat', 'Day'. You can
## override using the `.groups` argument.
df2$avg <- as.integer(df2$avg)
my_palette = c(brewer.pal(8, "Pastel1")[c(1:3)])
# stacked-bar plot
(p2 <- ggplot(df2, aes(x = Day, y = avg, fill = Types)) +
geom_col(color = "black", width = 0.8, lwd = 0.1) +
scale_y_continuous(expand = c(0, 0), limits=c(0, 80)) +
scale_fill_manual(values = my_palette) +
facet_grid(. ~ Treat) +
labs(x = "Days", y = "Relative abundance (%)", title = "Rare biosphere") +
guides(fill = guide_legend(title="Types of rarity")) +
mytheme +
theme(legend.position = "bottom"))
df3 <- df1 %>%
pivot_longer(cols = 4:ncol(df1),
names_to = "Types",
values_to = "value") %>%
mutate(Treat = factor(Treat, levels = c("T1", "T2", "T3", "T4", "T5"), labels = c("0 ton CTS/ha", "2.5 ton CTS/ha", "5 ton CTS/ha", "10 ton CTS/ha", "20 ton CTS/ha"))) %>%
mutate(Day = factor(Day, levels = c("0", "45", "75", "150", "180"))) %>%
mutate(Types = factor(Types, levels = c("only_rare", "conditionally_rare_dominant", "transiently_rare"), labels = c("Permanently rare", "Conditionally rare", "Transiently rare")))
(f2 <- ggplot(df3, aes(x = Day, y = value, group = Treat, colour = Treat)) +
stat_summary(fun.data = "mean_se", geom = "errorbar", width = 0.2, alpha = 0.5, position = pd) +
stat_summary(fun = "mean", geom = "line", alpha = 0.4, position = pd) +
stat_summary(fun = "mean", geom = "point", size = 2, alpha = 0.9, position = pd) +
scale_color_brewer(palette="PuBu", name = "CTS concentration") +
facet_grid(.~Types) +
labs(title = "Rare biosphere", x = "Day", y = "Relative abundance (%)") +
mytheme)
p <- ggarrange(p1, p2, labels = c("A", "B"), ncol = 2)
# ggsave("Types_of_rare_dominant_abundance_stacked_barplots.pdf", width = 24, height = 8.5, units = "cm", p, scale = 1.5)
# ggsave("Figure_5.pdf", width = 24, height = 8.5, units = "cm", p, scale = 1.5)
# ggsave("Figure_5.jpg", width = 24, height = 8.5, units = "cm", p, scale = 1.5)
f <- ggarrange(f1, f2, labels = c("A", "B"), widths = c(0.8, 1.2), common.legend = TRUE, legend = "right", ncol = 2)
# ggsave("Types_of_rare_dominant_abundance_lineplots_raw.pdf", width = 20, height = 5, units = "cm", f, scale = 1.5)
# ggsave("Figure_6.pdf", width = 20, height = 5, units = "cm", f, scale = 1.5)
# ggsave("Figure_6.jpg", width = 20, height = 5, units = "cm", f, scale = 1.5)
df <- as.data.frame(t(com))
df <- df %>%
rownames_to_column(var = "OTU_ID")
df <- left_join(df_types, df, by = "OTU_ID") %>%
column_to_rownames(var = "OTU_ID") %>%
filter(Types == "conditionally_rare_dominant") %>%
select(-Types)
There are 457 OTUs are conditionally rare/dominant.
Check the relative abundance of these conditionally rare/dominant at genus level
# add taxonomy info - Genera
df <- transform(merge(taxonomy[, -c(1:5)], df, by = "row.names"), row.names = Row.names, Row.names = NULL)
df <- df %>% select(-Species)
df <- aggregate(df[,-1], list(df$Genus), sum)
row.names(df) <- df$Group.1
df <- df[, -1]
df <- 100*df/rarefactiondepth
# remove these rows
row.names.remove <- c("", "uncultured", "uncultured soil bacterium")
df <- df[!(row.names(df) %in% row.names.remove), ]
# genus kept for plotting
row.names(df)
## [1] "Acidibacter"
## [2] "Actinomadura"
## [3] "ADurb.Bin063-1"
## [4] "Aeromicrobium"
## [5] "Alicyclobacillus"
## [6] "Ammoniphilus"
## [7] "Anaeromyxobacter"
## [8] "Bacillus"
## [9] "Blastocatella"
## [10] "Bradyrhizobium"
## [11] "Brevibacillus"
## [12] "Bryobacter"
## [13] "Candidatus Solibacter"
## [14] "Candidatus Udaeobacter"
## [15] "Chlorobi bacterium OLB5"
## [16] "Chryseobacterium"
## [17] "Clostridium sensu stricto 8"
## [18] "Cohnella"
## [19] "Conexibacter"
## [20] "Crossiella"
## [21] "Deinococcus"
## [22] "Dongia"
## [23] "Dyadobacter"
## [24] "Ellin6067"
## [25] "Exiguobacterium"
## [26] "Frankia"
## [27] "Gaiella"
## [28] "Gemmata"
## [29] "Gemmatirosa"
## [30] "Geodermatophilus"
## [31] "Labrys"
## [32] "Luteolibacter"
## [33] "Massilia"
## [34] "metagenome"
## [35] "Microbacterium"
## [36] "Microbispora"
## [37] "Microlunatus"
## [38] "Micromonospora"
## [39] "Microvirga"
## [40] "MND1"
## [41] "Mycobacterium"
## [42] "Nibrella"
## [43] "Nitrospira"
## [44] "Nocardia"
## [45] "Nocardioides"
## [46] "Nonomuraea"
## [47] "Nordella"
## [48] "Paenibacillus"
## [49] "Pantoea"
## [50] "Parviterribacter"
## [51] "Pedomicrobium"
## [52] "Pirellula"
## [53] "Pseudonocardia"
## [54] "RB41"
## [55] "Reyranella"
## [56] "Rubrobacter"
## [57] "Shimazuella"
## [58] "Singulisphaera"
## [59] "Solirubrobacter"
## [60] "Sphingobacterium"
## [61] "Sphingomonas"
## [62] "Stenotrophomonas"
## [63] "Streptomyces"
## [64] "Subgroup 10"
## [65] "Tellurimicrobium"
## [66] "Terrimonas"
## [67] "Tumebacillus"
## [68] "uncultured Aciditerrimonas sp."
## [69] "uncultured Acidobacteriales bacterium"
## [70] "uncultured actinobacterium"
## [71] "uncultured bacterium"
## [72] "uncultured beta proteobacterium"
## [73] "uncultured Chloroflexi bacterium"
## [74] "uncultured delta proteobacterium"
## [75] "uncultured Desulfuromonadales bacterium"
## [76] "uncultured Firmicutes bacterium"
## [77] "uncultured Gaiella sp."
## [78] "uncultured Gram-positive bacterium"
## [79] "uncultured Holophagae bacterium"
## [80] "uncultured organism"
## [81] "uncultured Pedosphaera sp."
## [82] "uncultured proteobacterium"
## [83] "uncultured Rubrobacterales bacterium"
## [84] "uncultured Rubrobacteria bacterium"
## [85] "uncultured Solirubrobacter sp."
## [86] "uncultured Syntrophobacteraceae bacterium"
df <- as.data.frame(t(df))
# prepare a meta data file
group_info <- data.frame(row.names = rownames(df), t(as.data.frame(strsplit(as.character(row.names(df)), "_"))))
# remove last letter in the third column
group_info$X2 <- gsub('.{1}$', '', group_info$X2)
# combine metadata to the dominant phyla table
df <- data.frame(Treat = as.factor(group_info[,1]),
Day = as.factor(group_info[,2]),
df)
# write.csv(df, "conditionally_rare_dominant_genera.csv")
# pivot data frame from wide to long
df1 <- df %>%
pivot_longer(cols = 3:ncol(df),
names_to = "Genus",
values_to = "value")
head(df1)
## # A tibble: 6 × 4
## Treat Day Genus value
## <fct> <fct> <chr> <dbl>
## 1 T1 0 Acidibacter 0
## 2 T1 0 Actinomadura 0
## 3 T1 0 ADurb.Bin063.1 0
## 4 T1 0 Aeromicrobium 0
## 5 T1 0 Alicyclobacillus 0
## 6 T1 0 Ammoniphilus 0.290
df1 <- df1 %>%
mutate(Treat = factor(Treat, levels = c("T1", "T2", "T3", "T4", "T5"), labels = c("0 ton CTS/ha", "2.5 ton CTS/ha", "5 ton CTS/ha", "10 ton CTS/ha", "20 ton CTS/ha"))) %>%
mutate(Day = factor(Day, levels = c("0", "45", "75", "150", "180")))
# %>%
# group_by(Treat, Day, Genus) %>%
# summarise(avg = mean(value, na.rm=T))
pd <- position_dodge(0.2)
(p <- ggplot(df1, aes(x = Day, y = value, group = Treat, colour = Treat)) +
stat_summary(fun.data = "mean_se", geom = "errorbar", width = 0.2, alpha = 0.5, position = pd) +
stat_summary(fun = "mean", geom = "line", alpha = 0.5, position = pd) +
stat_summary(fun = "mean", geom = "point", size = 3, position = pd) +
scale_color_brewer(palette="PuBu", name = "CTS concentration") +
facet_wrap(~Genus, nrow = 11, scales = "free_y") +
labs(title = "Conditionally rare/dominant genera", x = "Day", y = "Relative abundance (%)") +
mytheme +
theme(legend.position = c(0.8, 0.03)) +
theme(strip.text = element_text(face = "italic")))
# ggsave("genus_conditionally_rare_dominant.pdf", width = 32, height = 30, units = "cm", p, scale = 1.5)
df2 <- df1 %>% filter(
Genus %in% c("Nocardioides", "Nordella", "Parviterribacter", "Rubrobacter", "Tellurimicrobium",
"uncultured.actinobacterium", "uncultured.Gaiella.sp.", "uncultured.Solirubrobacter.sp."))
df2
## # A tibble: 592 × 4
## Treat Day Genus value
## <fct> <fct> <chr> <dbl>
## 1 0 ton CTS/ha 0 Nocardioides 0
## 2 0 ton CTS/ha 0 Nordella 0
## 3 0 ton CTS/ha 0 Parviterribacter 0
## 4 0 ton CTS/ha 0 Rubrobacter 0
## 5 0 ton CTS/ha 0 Tellurimicrobium 0
## 6 0 ton CTS/ha 0 uncultured.actinobacterium 0
## 7 0 ton CTS/ha 0 uncultured.Gaiella.sp. 0
## 8 0 ton CTS/ha 0 uncultured.Solirubrobacter.sp. 0.131
## 9 0 ton CTS/ha 0 Nocardioides 0.448
## 10 0 ton CTS/ha 0 Nordella 0.0828
## # … with 582 more rows
(p2 <- ggplot(df2, aes(x = Day, y = value, group = Treat, colour = Treat)) +
stat_summary(fun.data = "mean_se", geom = "errorbar", width = 0.2, alpha = 0.5, position = pd) +
stat_summary(fun = "mean", geom = "line", alpha = 0.5, position = pd) +
stat_summary(fun = "mean", geom = "point", size = 3, position = pd) +
scale_color_brewer(palette="PuBu", name = "CTS concentration") +
facet_wrap(~Genus, nrow = 4, scales = "free_y") +
labs(title = "", x = "Day", y = "Relative abundance (%)") +
mytheme +
theme(strip.text = element_text(face = "italic")))
# ggsave("Figure_4.pdf", width = 10, height = 12, units = "cm", p2, scale = 1.5)
# ggsave("Figure_4.jpg", width = 10, height = 12, units = "cm", p2, scale = 1.5)
# another way to show the results
# pivot data frame from wide to long
df1 <- df %>%
pivot_longer(cols = 3:ncol(df),
names_to = "Genus",
values_to = "value")
head(df1)
## # A tibble: 6 × 4
## Treat Day Genus value
## <fct> <fct> <chr> <dbl>
## 1 T1 0 Acidibacter 0
## 2 T1 0 Actinomadura 0
## 3 T1 0 ADurb.Bin063.1 0
## 4 T1 0 Aeromicrobium 0
## 5 T1 0 Alicyclobacillus 0
## 6 T1 0 Ammoniphilus 0.290
df1 <- df1 %>%
mutate(Treat = factor(Treat, levels = c("T1", "T2", "T3", "T4", "T5"), labels = c("0", "2.5", "5", "10", "20"))) %>%
mutate(Day = factor(Day, levels = c("0", "45", "75", "150", "180")))
df2 <- df1 %>% filter(
Genus %in% c("Nocardioides", "Nordella", "Parviterribacter", "Rubrobacter", "Tellurimicrobium",
"uncultured.actinobacterium", "uncultured.Gaiella.sp.", "uncultured.Solirubrobacter.sp."))
df2
## # A tibble: 592 × 4
## Treat Day Genus value
## <fct> <fct> <chr> <dbl>
## 1 0 0 Nocardioides 0
## 2 0 0 Nordella 0
## 3 0 0 Parviterribacter 0
## 4 0 0 Rubrobacter 0
## 5 0 0 Tellurimicrobium 0
## 6 0 0 uncultured.actinobacterium 0
## 7 0 0 uncultured.Gaiella.sp. 0
## 8 0 0 uncultured.Solirubrobacter.sp. 0.131
## 9 0 0 Nocardioides 0.448
## 10 0 0 Nordella 0.0828
## # … with 582 more rows
# %>%
# group_by(Treat, Day, Genus) %>%
# summarise(avg = mean(value, na.rm=T))
(p3 <- ggplot(df2, aes(x = Treat, y = value, group = Treat, colour = Treat)) +
stat_summary(fun.data = "mean_se", geom = "errorbar", width = 0.2, alpha = 0.5, position = pd) +
stat_summary(fun = "mean", geom = "line", alpha = 0.5, position = pd) +
stat_summary(fun = "mean", geom = "point", size = 3, alpha = 0.8, position = pd) +
scale_color_brewer(palette="PuBu") +
guides(col = FALSE) +
facet_wrap(~Genus, nrow = 4, scales = "free_y") +
labs(title = "", x = "CTS concentration (ton CTS/ha)", y = "Relative abundance (%)") +
mytheme +
theme(strip.text = element_text(face = "italic")))
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
# ggsave("Figure_4_new.pdf", width = 8, height = 12, units = "cm", p3, scale = 1.5)
# ggsave("Figure_4_new.jpg", width = 8, height = 12, units = "cm", p3, scale = 1.5)
sessionInfo()
## R version 4.2.1 (2022-06-23)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Monterey 12.3
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] ggpmisc_0.5.1 ggpp_0.4.5 ggpubr_0.5.0 RColorBrewer_1.1-3
## [5] ape_5.6-2 vegan_2.6-4 lattice_0.20-45 permute_0.9-7
## [9] forcats_0.5.2 stringr_1.4.1 dplyr_1.0.10 purrr_0.3.5
## [13] readr_2.1.3 tidyr_1.2.1 tibble_3.1.8 ggplot2_3.4.0
## [17] tidyverse_1.3.2
##
## loaded via a namespace (and not attached):
## [1] nlme_3.1-157 fs_1.5.2 lubridate_1.8.0
## [4] httr_1.4.4 tools_4.2.1 backports_1.4.1
## [7] bslib_0.4.0 utf8_1.2.2 R6_2.5.1
## [10] DBI_1.1.3 mgcv_1.8-40 colorspace_2.0-3
## [13] withr_2.5.0 gridExtra_2.3 tidyselect_1.2.0
## [16] compiler_4.2.1 cli_3.4.1 rvest_1.0.3
## [19] quantreg_5.94 SparseM_1.81 xml2_1.3.3
## [22] labeling_0.4.2 sass_0.4.2 scales_1.2.1
## [25] digest_0.6.30 rmarkdown_2.17 pkgconfig_2.0.3
## [28] htmltools_0.5.3 dbplyr_2.2.1 fastmap_1.1.0
## [31] highr_0.9 rlang_1.0.6 readxl_1.4.1
## [34] rstudioapi_0.14 farver_2.1.1 jquerylib_0.1.4
## [37] generics_0.1.3 jsonlite_1.8.2 car_3.1-1
## [40] googlesheets4_1.0.1 confintr_0.2.0 magrittr_2.0.3
## [43] polynom_1.4-1 Matrix_1.5-3 Rcpp_1.0.9
## [46] munsell_0.5.0 fansi_1.0.3 abind_1.4-5
## [49] lifecycle_1.0.3 stringi_1.7.8 yaml_2.3.6
## [52] carData_3.0-5 MASS_7.3-57 grid_4.2.1
## [55] parallel_4.2.1 crayon_1.5.2 cowplot_1.1.1
## [58] haven_2.5.1 splines_4.2.1 hms_1.1.2
## [61] knitr_1.40 pillar_1.8.1 boot_1.3-28
## [64] ggsignif_0.6.4 reprex_2.0.2 glue_1.6.2
## [67] evaluate_0.17 modelr_0.1.9 vctrs_0.5.0
## [70] tzdb_0.3.0 MatrixModels_0.5-1 cellranger_1.1.0
## [73] gtable_0.3.1 assertthat_0.2.1 cachem_1.0.6
## [76] xfun_0.34 broom_1.0.1 rstatix_0.7.1
## [79] survival_3.3-1 googledrive_2.0.0 gargle_1.2.1
## [82] cluster_2.1.3 ellipsis_0.3.2